In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("county_annual.csv")
df_annualnull = df[df.isnull().any(axis=1) & (df['BUYER_STATE'] != "PR")]
print(df_annualnull)
       Unnamed: 0 BUYER_COUNTY BUYER_STATE  year  count  DOSAGE_UNIT  \
10071       10072         GUAM          GU  2006    319     265348.0   
10072       10073         GUAM          GU  2007    330     275600.0   
10073       10074         GUAM          GU  2008    313     286900.0   
10074       10075         GUAM          GU  2009    390     355300.0   
10075       10076         GUAM          GU  2010    510     413800.0   
...           ...          ...         ...   ...    ...          ...   
27751       27752          NaN          MA  2006    247     114900.0   
27752       27753          NaN          NV  2006    380     173600.0   
27753       27754          NaN          NV  2007    447     200600.0   
27754       27755          NaN          NV  2008      5       2200.0   
27755       27756          NaN          OH  2006     23       5100.0   

       countyfips  
10071         NaN  
10072         NaN  
10073         NaN  
10074         NaN  
10075         NaN  
...           ...  
27751         NaN  
27752         NaN  
27753         NaN  
27754         NaN  
27755         NaN  

[74 rows x 7 columns]
In [2]:
df_county = df.dropna(subset=['BUYER_COUNTY'])
df_county
Out[2]:
Unnamed: 0 BUYER_COUNTY BUYER_STATE year count DOSAGE_UNIT countyfips
0 1 ABBEVILLE SC 2006 877 363620.0 45001.0
1 2 ABBEVILLE SC 2007 908 402940.0 45001.0
2 3 ABBEVILLE SC 2008 871 424590.0 45001.0
3 4 ABBEVILLE SC 2009 930 467230.0 45001.0
4 5 ABBEVILLE SC 2010 1197 539280.0 45001.0
... ... ... ... ... ... ... ...
27736 27737 ZAVALA TX 2010 248 200100.0 48507.0
27737 27738 ZAVALA TX 2011 406 244800.0 48507.0
27738 27739 ZAVALA TX 2012 473 263700.0 48507.0
27739 27740 ZAVALA TX 2013 399 186700.0 48507.0
27740 27741 ZAVALA TX 2014 162 148930.0 48507.0

27741 rows × 7 columns

In [3]:
## Question 4
df_mean = df_county[['year','DOSAGE_UNIT']]
df_meandosage = df_mean.groupby(['year']).mean().reset_index()
print(df_meandosage)
   year   DOSAGE_UNIT
0  2006  2.654669e+06
1  2007  2.995906e+06
2  2008  3.254271e+06
3  2009  3.526038e+06
4  2010  3.783656e+06
5  2011  4.035583e+06
6  2012  3.993064e+06
7  2013  3.861752e+06
8  2014  3.768738e+06
In [4]:
import plotly.express as px
fig = px.scatter(df_meandosage, x="year", y="DOSAGE_UNIT")
fig.show()
In [ ]: